package au.com.acpfg.misc.spectra;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.knime.core.data.DataCell;
import org.knime.core.data.DataColumnSpec;
import org.knime.core.data.DataColumnSpecCreator;
import org.knime.core.data.DataRow;
import org.knime.core.data.DataTableSpec;
import org.knime.core.data.DataType;
import org.knime.core.data.DataValue;
import org.knime.core.data.RowKey;
import org.knime.core.data.container.DataContainer;
import org.knime.core.data.def.DefaultRow;
import org.knime.core.data.def.DoubleCell;
import org.knime.core.data.def.IntCell;
import org.knime.core.data.def.StringCell;
import org.knime.core.node.BufferedDataContainer;
import org.knime.core.node.BufferedDataTable;
import org.knime.core.node.CanceledExecutionException;
import org.knime.core.node.defaultnodesettings.SettingsModelBoolean;
import org.knime.core.node.defaultnodesettings.SettingsModelIntegerBounded;
import org.knime.core.node.defaultnodesettings.SettingsModelString;
import org.knime.core.node.ExecutionContext;
import org.knime.core.node.ExecutionMonitor;
import org.knime.core.node.InvalidSettingsException;
import org.knime.core.node.NodeLogger;
import org.knime.core.node.NodeModel;
import org.knime.core.node.NodeSettingsRO;
import org.knime.core.node.NodeSettingsWO;
import org.systemsbiology.jrap.stax.DataProcessingInfo;
import org.systemsbiology.jrap.stax.MSInstrumentInfo;
import org.systemsbiology.jrap.stax.MSOperator;
import org.systemsbiology.jrap.stax.MSXMLParser;
import org.systemsbiology.jrap.stax.MZXMLFileInfo;
import org.systemsbiology.jrap.stax.ScanHeader;
import org.systemsbiology.jrap.stax.SoftwareInfo;
/**
* This is the model implementation of MzXMLReader.
* Using the jrap-stax library, this node reads mzXML/mzML
*
* @author Andrew Cassin
*/
public class SpectraReaderNodeModel extends NodeModel {
// the logger instance
private static final NodeLogger logger = NodeLogger
.getLogger(SpectraReaderNodeModel.class);
/** the settings key which is used to retrieve and
store the settings (from the dialog or from a settings file)
(package visibility to be usable from the dialog). */
static final String CFGKEY_SPECTRA_FOLDER= "spectra-folder";
static final String CFGKEY_LOAD_SPECTRA= "load-spectra";
static final String CFGKEY_MZML = "load-mzml";
static final String CFGKEY_MGF = "load-mgf";
/** initial default folder to scan for mzxml */
static final String DEFAULT_SPECTRA_FOLDER = "c:/temp";
static final boolean DEFAULT_MZML = true;
static final boolean DEFAULT_MGF = true;
// number of columns in scan output
private final static int NUM_SCAN_COLS = 23;
// number of columns in file summary output
private final static int NUM_FILE_COLS = 9;
// example value: the models count variable filled from the dialog
// and used in the models execution method. The default components of the
// dialog work with "SettingsModels".
private final SettingsModelString m_folder=new SettingsModelString(CFGKEY_SPECTRA_FOLDER, DEFAULT_SPECTRA_FOLDER);
private final SettingsModelBoolean m_spectra= new SettingsModelBoolean(CFGKEY_LOAD_SPECTRA, false);
private final SettingsModelBoolean m_mzml = new SettingsModelBoolean(CFGKEY_MZML, DEFAULT_MZML);
private final SettingsModelBoolean m_mgf = new SettingsModelBoolean(CFGKEY_MGF, DEFAULT_MGF);
/**
* Constructor for the node model.
*/
protected SpectraReaderNodeModel() {
// two outgoing ports
super(0, 2);
}
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData,
final ExecutionContext exec) throws Exception {
logger.info("Processing mzXML/mzML files in folder: "+ m_folder.getStringValue());
File[] entries = new File(m_folder.getStringValue()).listFiles();
// if user requests it we will add columns for spectra/chromatograms
int extra = 0;
if (m_spectra.getBooleanValue()) {
extra++;
}
// first output port
DataColumnSpec[] allColSpecs = new DataColumnSpec[NUM_SCAN_COLS+extra];
allColSpecs[0] = new DataColumnSpecCreator("Scan Type", StringCell.TYPE).createSpec();
allColSpecs[1] = new DataColumnSpecCreator("Polarity", StringCell.TYPE).createSpec();
allColSpecs[2] = new DataColumnSpecCreator("Retention Time", StringCell.TYPE).createSpec();
allColSpecs[3] = new DataColumnSpecCreator("Base Peak Intensity", DoubleCell.TYPE).createSpec();
allColSpecs[4] = new DataColumnSpecCreator("Base Peak M/Z", DoubleCell.TYPE).createSpec();
allColSpecs[5] = new DataColumnSpecCreator("Centroided?", IntCell.TYPE).createSpec();
allColSpecs[6] = new DataColumnSpecCreator("Deisotoped?", IntCell.TYPE).createSpec();
allColSpecs[7] = new DataColumnSpecCreator("Charge Deconvoluted?", IntCell.TYPE).createSpec();
allColSpecs[8] = new DataColumnSpecCreator("MS Level (2=MS/MS)", IntCell.TYPE).createSpec();
allColSpecs[9] = new DataColumnSpecCreator("Scan ID", StringCell.TYPE).createSpec();
allColSpecs[10] = new DataColumnSpecCreator("Precursor Charge", IntCell.TYPE).createSpec();
allColSpecs[11] = new DataColumnSpecCreator("Precursor Scan Number", IntCell.TYPE).createSpec();
allColSpecs[12] = new DataColumnSpecCreator("Precursor Intensity", DoubleCell.TYPE).createSpec();
allColSpecs[13] = new DataColumnSpecCreator("Precursor M/Z", DoubleCell.TYPE).createSpec();
allColSpecs[14] = new DataColumnSpecCreator("Total Ion Current", DoubleCell.TYPE).createSpec();
allColSpecs[15] = new DataColumnSpecCreator("Collision Energy", DoubleCell.TYPE).createSpec();
allColSpecs[16] = new DataColumnSpecCreator("Ionisation Energy", DoubleCell.TYPE).createSpec();
allColSpecs[17] = new DataColumnSpecCreator("Start M/Z", DoubleCell.TYPE).createSpec();
allColSpecs[18] = new DataColumnSpecCreator("End M/Z", DoubleCell.TYPE).createSpec();
allColSpecs[19] = new DataColumnSpecCreator("Low M/Z", DoubleCell.TYPE).createSpec();
allColSpecs[20] = new DataColumnSpecCreator("High M/Z", DoubleCell.TYPE).createSpec();
allColSpecs[21] = new DataColumnSpecCreator("Filename", StringCell.TYPE).createSpec();
allColSpecs[22] = new DataColumnSpecCreator("Number of peaks", IntCell.TYPE).createSpec();
if (extra == 1) {
allColSpecs[23] = new DataColumnSpecCreator("Spectra", AbstractSpectraCell.TYPE).createSpec();
}
// second output port
DataColumnSpec[] fileSpecs = new DataColumnSpec[NUM_FILE_COLS];
fileSpecs[8] = new DataColumnSpecCreator("Filename", StringCell.TYPE).createSpec();
fileSpecs[0] = new DataColumnSpecCreator("Instrument Manufacturer", StringCell.TYPE).createSpec();
fileSpecs[1] = new DataColumnSpecCreator("Instrument Model", StringCell.TYPE).createSpec();
fileSpecs[2] = new DataColumnSpecCreator("Instrument Software", StringCell.TYPE).createSpec();
fileSpecs[3] = new DataColumnSpecCreator("Instrument Operator", StringCell.TYPE).createSpec();
fileSpecs[4] = new DataColumnSpecCreator("Mass Analyzer", StringCell.TYPE).createSpec();
fileSpecs[5] = new DataColumnSpecCreator("Ionization", StringCell.TYPE).createSpec();
fileSpecs[6] = new DataColumnSpecCreator("Detector", StringCell.TYPE).createSpec();
fileSpecs[7] = new DataColumnSpecCreator("Data Processing", StringCell.TYPE).createSpec();
DataTableSpec outputSpec = new DataTableSpec(allColSpecs);
// the execution context will provide us with storage capacity, in this
// case a data container to which we will add rows sequentially
// Note, this container can also handle arbitrary big data tables, it
// will buffer to disc if necessary.
BufferedDataContainer container = exec.createDataContainer(outputSpec);
DataTableSpec outputFileSpec = new DataTableSpec(fileSpecs);
BufferedDataContainer file_container = exec.createDataContainer(outputFileSpec);
// NB: here we dont check with the readers for each filename (maybe take too long with a large number of readers...)
// instead, we just hardcode what is supported
int done = 0;
ArrayList<File> filtered_entries = new ArrayList<File>();
for (File f : entries) {
String ext = f.getName().toLowerCase();
if (! f.isFile()) {
continue;
}
if (ext.endsWith(".xml") || ext.endsWith(".mzxml")
|| ext.endsWith(".mzml") || ext.endsWith(".mgf") || ext.endsWith(".mgf.gz")) {
filtered_entries.add(f);
}
}
int cnt = filtered_entries.size();
logger.info("Found "+cnt+" plausible files for loading.");
long scan_id = 1; // must be unique across multiple files
int file_id = 1;
// instantiate the data processor's for each supported filetype
ArrayList<AbstractDataProcessor> dp_list = new ArrayList<AbstractDataProcessor>();
if (m_mzml.getBooleanValue())
dp_list.add(new mzMLDataProcessor());
if (m_mgf.getBooleanValue())
dp_list.add(new MGFDataProcessor());
/*
* For each filtered file we try each processor which can process the file in the order
* constructed above
*/
RowSequence scan_seq = new RowSequence("Scan");
RowSequence file_seq = new RowSequence("File");
for (File f : filtered_entries) {
String filename = f.getName();
try {
logger.info("Processing file: "+filename);
exec.checkCanceled();
exec.setProgress(((double)done)/cnt, "Processing file "+f.getName());
for (int i=0; i<dp_list.size(); i++) {
AbstractDataProcessor dp = dp_list.get(i);
if (dp.can(f)) {
dp.setInput(f.getAbsolutePath());
dp.process(m_spectra.getBooleanValue(), scan_seq, file_seq,
exec, container, file_container);
dp.finish();
// short-circuit if successfully processed
break;
}
}
} catch (CanceledExecutionException ce) {
container.close();
file_container.close();
throw ce;
} catch (Exception e) {
e.printStackTrace();
logger.warn("Unable to process "+filename+ "... skipping! (file ignored)");
logger.warn(e);
}
done++;
exec.setProgress(((double)done)/cnt, "Completed processing file "+f.getName());
}
// once we are done, we close the container and return its table
container.close();
file_container.close();
BufferedDataTable out = container.getTable();
BufferedDataTable out2= file_container.getTable();
return new BufferedDataTable[]{out,out2};
}
/**
* {@inheritDoc}
*/
@Override
protected void reset() {
}
/**
* {@inheritDoc}
*/
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs)
throws InvalidSettingsException {
return new DataTableSpec[]{null,null};
}
/**
* {@inheritDoc}
*/
@Override
protected void saveSettingsTo(final NodeSettingsWO settings) {
m_folder.saveSettingsTo(settings);
m_spectra.saveSettingsTo(settings);
m_mgf.saveSettingsTo(settings);
m_mzml.saveSettingsTo(settings);
}
/**
* {@inheritDoc}
*/
@Override
protected void loadValidatedSettingsFrom(final NodeSettingsRO settings)
throws InvalidSettingsException {
m_folder.loadSettingsFrom(settings);
m_spectra.loadSettingsFrom(settings);
m_mgf.loadSettingsFrom(settings);
m_mzml.loadSettingsFrom(settings);
}
/**
* {@inheritDoc}
*/
@Override
protected void validateSettings(final NodeSettingsRO settings)
throws InvalidSettingsException {
m_folder.validateSettings(settings);
m_spectra.validateSettings(settings);
m_mgf.validateSettings(settings);
m_mzml.validateSettings(settings);
}
/**
* {@inheritDoc}
*/
@Override
protected void loadInternals(final File internDir,
final ExecutionMonitor exec) throws IOException,
CanceledExecutionException {
// TODO load internal data.
// Everything handed to output ports is loaded automatically (data
// returned by the execute method, models loaded in loadModelContent,
// and user settings set through loadSettingsFrom - is all taken care
// of). Load here only the other internals that need to be restored
// (e.g. data used by the views).
}
/**
* {@inheritDoc}
*/
@Override
protected void saveInternals(final File internDir,
final ExecutionMonitor exec) throws IOException,
CanceledExecutionException {
// TODO save internal models.
// Everything written to output ports is saved automatically (data
// returned by the execute method, models saved in the saveModelContent,
// and user settings saved through saveSettingsTo - is all taken care
// of). Save here only the other internals that need to be preserved
// (e.g. data used by the views).
}
}